Code
library(tidyverse)
library(here)
library(gt)
library(DT)
library(kableExtra)Alex Goodman
namesAllison <- namesA |>
filter(`Name` == 'Allison') |>
rename(sex = Gender) |>
select(Name, sex, State, Count) |>
group_by(`State`, sex) |>
summarize(total_names = sum(Count)) |>
pivot_wider(names_from = sex, values_from = total_names, values_fill = 0)
namesAllisonF <- namesAllison |>
select(`State`, `F`)
namesAllison |>
knitr::kable(format = "html",
digits = 3,
col.names =
c("State",
"F",
"M"),
caption = "Number of Allisons in the U.S by State and Sex")| State | F | M |
|---|---|---|
| AK | 232 | 0 |
| AL | 1535 | 0 |
| AR | 1198 | 0 |
| AZ | 1880 | 0 |
| CA | 12413 | 0 |
| CO | 1594 | 0 |
| CT | 1099 | 0 |
| DC | 321 | 0 |
| DE | 294 | 0 |
| FL | 4455 | 0 |
| GA | 3257 | 0 |
| HI | 183 | 0 |
| IA | 1477 | 0 |
| ID | 451 | 0 |
| IL | 5110 | 0 |
| IN | 3067 | 0 |
| KS | 1283 | 0 |
| KY | 1905 | 20 |
| LA | 1209 | 0 |
| MA | 2218 | 0 |
| MD | 2229 | 0 |
| ME | 340 | 0 |
| MI | 4014 | 0 |
| MN | 2374 | 0 |
| MO | 2882 | 0 |
| MS | 817 | 0 |
| MT | 226 | 0 |
| NC | 3435 | 0 |
| ND | 285 | 0 |
| NE | 807 | 0 |
| NH | 412 | 0 |
| NJ | 3052 | 0 |
| NM | 399 | 0 |
| NV | 729 | 0 |
| NY | 5747 | 0 |
| OH | 5487 | 0 |
| OK | 1421 | 0 |
| OR | 1186 | 0 |
| PA | 4307 | 0 |
| RI | 306 | 0 |
| SC | 1228 | 0 |
| SD | 376 | 0 |
| TN | 2488 | 0 |
| TX | 10192 | 0 |
| UT | 1125 | 0 |
| VA | 3220 | 0 |
| VT | 135 | 0 |
| WA | 1956 | 0 |
| WI | 2367 | 0 |
| WV | 813 | 0 |
| WY | 142 | 0 |
names_viz <- namesA |>
filter(`Name` == 'Allison') |>
rename(sex = Gender)
my_viz <- names_viz |>
group_by(`Year`) |>
summarize(total_names = sum(Count))
ggplot(data = my_viz, mapping = aes(x = `Year`, y = total_names)) +
geom_line() +
geom_point() +
labs(y = NULL, title = "Popularity of the name 'Allison' in the U.S. by Count")
# A tibble: 2 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 209815. 42883. 4.89 0.000163
2 Year -102. 21.4 -4.75 0.000217

# The pattern of residuals appears to have a relatively u-shaped form in the middle, with a couple of smaller curves on the edges. It is pretty hard to tell whether the residuals are scattered randomly or have a nonlinear pattern, so it could really go either way. Because there tends to be a larger u-shape in the middle of the distribution, I would say a quadratic model or another nonlinear model could fit the data better than a linear one. So there seems to be a decline in the name 'Allison' and unfortunately, according to the format of the question and the model, the name isn't as 'cool' anymore.allans <- namesA |>
filter(`Gender` == 'M', `Name` %in% c('Allan', 'Alan', 'Allen')) |>
rename(sex = Gender)
my_viz2 <- allans |>
group_by(`Year`) |>
summarize(total_names = sum(Count))
ggplot(data = my_viz2, mapping = aes(x = `Year`, y = total_names)) +
geom_line() +
geom_point() +
labs(y = NULL, title = "Popularity of the names 'Allan', 'Alan', and 'Allen' in the U.S. by Count")
# 9
allans2 <- namesA |>
rename(sex = Gender) |>
filter(Year == 2000, Name == 'Allan' | Name == 'Alan' | Name == 'Allen', State == 'PA' | State == 'CA' ) |>
select(Name, sex, State, Count) |>
group_by(`Name`, `State`) |>
summarize(total_names = sum(Count)) |>
group_by(`State`) |>
mutate(total_names = total_names / sum(total_names)) |>
pivot_wider(names_from = Name, values_from = total_names, values_fill = 0)
allans2 %>%
knitr::kable(format = "html",
digits = 3,
col.names =
c("State",
"Alan",
"Allan",
"Allen"),
caption = "Proportion of Allans, Allens, and Alans in CA and PA") %>%
kableExtra::kable_styling(font_size = 20) %>%
kableExtra::kable_classic(html_font = "verdana")| State | Alan | Allan | Allen |
|---|---|---|---|
| CA | 0.655 | 0.147 | 0.198 |
| PA | 0.429 | 0.101 | 0.471 |
percentage breakdown: from CA: 66% Alan, 15% Allan, 20% Allen. from PA: 43% Alan, 10% Allan, 47% Allen.
Challenge 9
Part 1 - completed above Part 2 - I used the font size option for the last table from the kableExtra package. I also changed the font to verdana with the kable_classic() theming option, and added a title. Part 3 - added at beginning
---
title: "Kable Work"
author: "Alex Goodman"
format:
html:
self-contained: True
code-tools: True
code-fold: True
execute:
echo: true
error: true
message: false
warning: false
theme: cerulean
---
```{r}
library(tidyverse)
library(here)
library(gt)
library(DT)
library(kableExtra)
```
```{r}
namesA <- read_csv(here::here('portfolio_files', 'data', 'StateNames_A.csv'))
datatable(namesA)
```
2.
```{r}
namesAllison <- namesA |>
filter(`Name` == 'Allison') |>
rename(sex = Gender) |>
select(Name, sex, State, Count) |>
group_by(`State`, sex) |>
summarize(total_names = sum(Count)) |>
pivot_wider(names_from = sex, values_from = total_names, values_fill = 0)
namesAllisonF <- namesAllison |>
select(`State`, `F`)
namesAllison |>
knitr::kable(format = "html",
digits = 3,
col.names =
c("State",
"F",
"M"),
caption = "Number of Allisons in the U.S by State and Sex")
```
3.
```{r}
names_viz <- namesA |>
filter(`Name` == 'Allison') |>
rename(sex = Gender)
my_viz <- names_viz |>
group_by(`Year`) |>
summarize(total_names = sum(Count))
ggplot(data = my_viz, mapping = aes(x = `Year`, y = total_names)) +
geom_line() +
geom_point() +
labs(y = NULL, title = "Popularity of the name 'Allison' in the U.S. by Count")
```
4.
```{r}
allison_lm <- my_viz |>
lm(total_names ~ Year, data = _)
broom::tidy(allison_lm)
```
5.
```{r}
#Regression Equation
# y = 209815.052 - 101.581x
allison_lm |>
broom::augment() |>
ggplot(mapping = aes(y = .resid, x = .fitted)) +
geom_point() +
geom_line()
# The pattern of residuals appears to have a relatively u-shaped form in the middle, with a couple of smaller curves on the edges. It is pretty hard to tell whether the residuals are scattered randomly or have a nonlinear pattern, so it could really go either way. Because there tends to be a larger u-shape in the middle of the distribution, I would say a quadratic model or another nonlinear model could fit the data better than a linear one. So there seems to be a decline in the name 'Allison' and unfortunately, according to the format of the question and the model, the name isn't as 'cool' anymore.
```
5.
```{r}
allans <- namesA |>
filter(`Gender` == 'M', `Name` %in% c('Allan', 'Alan', 'Allen')) |>
rename(sex = Gender)
my_viz2 <- allans |>
group_by(`Year`) |>
summarize(total_names = sum(Count))
ggplot(data = my_viz2, mapping = aes(x = `Year`, y = total_names)) +
geom_line() +
geom_point() +
labs(y = NULL, title = "Popularity of the names 'Allan', 'Alan', and 'Allen' in the U.S. by Count")
```
```{r}
# 9
allans2 <- namesA |>
rename(sex = Gender) |>
filter(Year == 2000, Name == 'Allan' | Name == 'Alan' | Name == 'Allen', State == 'PA' | State == 'CA' ) |>
select(Name, sex, State, Count) |>
group_by(`Name`, `State`) |>
summarize(total_names = sum(Count)) |>
group_by(`State`) |>
mutate(total_names = total_names / sum(total_names)) |>
pivot_wider(names_from = Name, values_from = total_names, values_fill = 0)
allans2 %>%
knitr::kable(format = "html",
digits = 3,
col.names =
c("State",
"Alan",
"Allan",
"Allen"),
caption = "Proportion of Allans, Allens, and Alans in CA and PA") %>%
kableExtra::kable_styling(font_size = 20) %>%
kableExtra::kable_classic(html_font = "verdana")
```
percentage breakdown: from CA: 66% Alan, 15% Allan, 20% Allen. from PA: 43% Alan, 10% Allan, 47% Allen.
Challenge 9
Part 1 - completed above
Part 2 - I used the font size option for the last table from the kableExtra package. I also changed the font to verdana with the kable_classic() theming option, and added a title.
Part 3 - added at beginning